library(tidyverse)
library(lubridate)
library(RColorBrewer)
report_03_11_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-11-2020.csv")) %>%
rename(Country.Region = "Country/Region", Province.State = "Province/State")
## Parsed with column specification:
## cols(
## `Province/State` = col_character(),
## `Country/Region` = col_character(),
## `Last Update` = col_datetime(format = ""),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Latitude = col_double(),
## Longitude = col_double()
## )
head(report_03_11_2020)
## # A tibble: 6 x 8
## Province.State Country.Region `Last Update` Confirmed Deaths Recovered
## <chr> <chr> <dttm> <dbl> <dbl> <dbl>
## 1 Hubei China 2020-03-11 10:53:02 67773 3046 49134
## 2 <NA> Italy 2020-03-11 21:33:02 12462 827 1045
## 3 <NA> Iran 2020-03-11 18:52:03 9000 354 2959
## 4 <NA> Korea, South 2020-03-11 21:13:18 7755 60 288
## 5 France France 2020-03-11 22:53:03 2281 48 12
## 6 <NA> Spain 2020-03-11 20:53:02 2277 54 183
## # … with 2 more variables: Latitude <dbl>, Longitude <dbl>
str(report_03_11_2020)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 216 obs. of 8 variables:
## $ Province.State: chr "Hubei" NA NA NA ...
## $ Country.Region: chr "China" "Italy" "Iran" "Korea, South" ...
## $ Last Update : POSIXct, format: "2020-03-11 10:53:02" "2020-03-11 21:33:02" ...
## $ Confirmed : num 67773 12462 9000 7755 2281 ...
## $ Deaths : num 3046 827 354 60 48 ...
## $ Recovered : num 49134 1045 2959 288 12 ...
## $ Latitude : num 31 43 32 36 46.2 ...
## $ Longitude : num 112.27 12 53 128 2.21 ...
## - attr(*, "spec")=
## .. cols(
## .. `Province/State` = col_character(),
## .. `Country/Region` = col_character(),
## .. `Last Update` = col_datetime(format = ""),
## .. Confirmed = col_double(),
## .. Deaths = col_double(),
## .. Recovered = col_double(),
## .. Latitude = col_double(),
## .. Longitude = col_double()
## .. )
report_03_24_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-24-2020.csv")) %>%
rename(Country.Region = "Country_Region", Province.State = "Province_State") %>%
select(-FIPS, -Admin2)
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
head(report_03_24_2020)
## # A tibble: 6 x 10
## Province.State Country.Region Last_Update Lat Long_ Confirmed
## <chr> <chr> <dttm> <dbl> <dbl> <dbl>
## 1 South Carolina US 2020-03-24 23:37:31 34.2 -82.5 1
## 2 Louisiana US 2020-03-24 23:37:31 30.3 -92.4 2
## 3 Virginia US 2020-03-24 23:37:31 37.8 -75.6 1
## 4 Idaho US 2020-03-24 23:37:31 43.5 -116. 19
## 5 Iowa US 2020-03-24 23:37:31 41.3 -94.5 1
## 6 Kentucky US 2020-03-24 23:37:31 37.1 -85.3 0
## # … with 4 more variables: Deaths <dbl>, Recovered <dbl>, Active <dbl>,
## # Combined_Key <chr>
str(report_03_24_2020)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 3417 obs. of 10 variables:
## $ Province.State: chr "South Carolina" "Louisiana" "Virginia" "Idaho" ...
## $ Country.Region: chr "US" "US" "US" "US" ...
## $ Last_Update : POSIXct, format: "2020-03-24 23:37:31" "2020-03-24 23:37:31" ...
## $ Lat : num 34.2 30.3 37.8 43.5 41.3 ...
## $ Long_ : num -82.5 -92.4 -75.6 -116.2 -94.5 ...
## $ Confirmed : num 1 2 1 19 1 0 1 0 25 0 ...
## $ Deaths : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Recovered : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Active : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined_Key : chr "Abbeville, South Carolina, US" "Acadia, Louisiana, US" "Accomack, Virginia, US" "Ada, Idaho, US" ...
## - attr(*, "spec")=
## .. cols(
## .. FIPS = col_character(),
## .. Admin2 = col_character(),
## .. Province_State = col_character(),
## .. Country_Region = col_character(),
## .. Last_Update = col_datetime(format = ""),
## .. Lat = col_double(),
## .. Long_ = col_double(),
## .. Confirmed = col_double(),
## .. Deaths = col_double(),
## .. Recovered = col_double(),
## .. Active = col_double(),
## .. Combined_Key = col_character()
## .. )
report_03_11_2020 %>%
filter(Country.Region =="US") %>%
ggplot(aes(Confirmed,reorder(Province.State,Confirmed)))+
geom_point() +
ggtitle("Confirmed cases for each US State")+
ylab("Country/Region")+
xlab("Confirmed Cases")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_US_march_11.png", width = 10, height = 10, dpi = "screen")
report_03_11_2020 %>%
group_by(Country.Region) %>%
summarise(Deaths=sum(Deaths)) %>%
arrange(desc(Deaths))
## # A tibble: 116 x 2
## Country.Region Deaths
## <chr> <dbl>
## 1 China 3161
## 2 Italy 827
## 3 Iran 354
## 4 Korea, South 60
## 5 Spain 54
## 6 France 48
## 7 US 36
## 8 Japan 15
## 9 United Kingdom 8
## 10 Cruise Ship 7
## # … with 106 more rows
report_03_11_2020 %>%
group_by(Country.Region) %>%
summarise(Deaths=sum(Deaths)) %>%
arrange(desc(Deaths)) %>%
slice(1:20) %>%
ggplot(aes(x=Deaths, y=reorder(Country.Region, Deaths)))+
geom_bar(stat='identity')+
ggtitle("The 20 countries with the most reported COV19-related deaths") +
ylab("Country/Region")+
xlab("Deaths")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Deaths_20_countries_march_11.png", width = 10, height = 10, dpi = "screen")
time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province.State = "Province/State", Country.Region = "Country/Region")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed)
## # A tibble: 6 x 68
## Province.State Country.Region Lat Long `1/22/20` `1/23/20` `1/24/20`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 <NA> Afghanistan 33 65 0 0 0
## 2 <NA> Albania 41.2 20.2 0 0 0
## 3 <NA> Algeria 28.0 1.66 0 0 0
## 4 <NA> Andorra 42.5 1.52 0 0 0
## 5 <NA> Angola -11.2 17.9 0 0 0
## 6 <NA> Antigua and B… 17.1 -61.8 0 0 0
## # … with 61 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## # `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## # `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## # `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## # `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## # `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## # `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## # `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## # `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## # `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## # `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## # `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## # `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## # `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## # `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## # `3/24/20` <dbl>, `3/25/20` <dbl>
str(time_series_confirmed)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 245 obs. of 68 variables:
## $ Province.State: chr NA NA NA NA ...
## $ Country.Region: chr "Afghanistan" "Albania" "Algeria" "Andorra" ...
## $ Lat : num 33 41.2 28 42.5 -11.2 ...
## $ Long : num 65 20.17 1.66 1.52 17.87 ...
## $ 1/22/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/23/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/24/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/25/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/26/20 : num 0 0 0 0 0 0 0 0 0 3 ...
## $ 1/27/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 1/28/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 1/29/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 1/30/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 1/31/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/1/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/2/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/3/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/4/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/5/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/6/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/7/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/8/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/9/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/10/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/11/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/12/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/13/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/14/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/15/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/16/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/17/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/18/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/19/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/20/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/21/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/22/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/23/20 : num 0 0 0 0 0 0 0 0 0 4 ...
## $ 2/24/20 : num 1 0 0 0 0 0 0 0 0 4 ...
## $ 2/25/20 : num 1 0 1 0 0 0 0 0 0 4 ...
## $ 2/26/20 : num 1 0 1 0 0 0 0 0 0 4 ...
## $ 2/27/20 : num 1 0 1 0 0 0 0 0 0 4 ...
## $ 2/28/20 : num 1 0 1 0 0 0 0 0 0 4 ...
## $ 2/29/20 : num 1 0 1 0 0 0 0 0 0 4 ...
## $ 3/1/20 : num 1 0 1 0 0 0 0 1 0 6 ...
## $ 3/2/20 : num 1 0 3 1 0 0 0 1 0 6 ...
## $ 3/3/20 : num 1 0 5 1 0 0 1 1 0 13 ...
## $ 3/4/20 : num 1 0 12 1 0 0 1 1 0 22 ...
## $ 3/5/20 : num 1 0 12 1 0 0 1 1 0 22 ...
## $ 3/6/20 : num 1 0 17 1 0 0 2 1 0 26 ...
## $ 3/7/20 : num 1 0 17 1 0 0 8 1 0 28 ...
## $ 3/8/20 : num 4 0 19 1 0 0 12 1 0 38 ...
## $ 3/9/20 : num 4 2 20 1 0 0 12 1 0 48 ...
## $ 3/10/20 : num 5 10 20 1 0 0 17 1 0 55 ...
## $ 3/11/20 : num 7 12 20 1 0 0 19 1 0 65 ...
## $ 3/12/20 : num 7 23 24 1 0 0 19 4 0 65 ...
## $ 3/13/20 : num 7 33 26 1 0 1 31 8 1 92 ...
## $ 3/14/20 : num 11 38 37 1 0 1 34 18 1 112 ...
## $ 3/15/20 : num 16 42 48 1 0 1 45 26 1 134 ...
## $ 3/16/20 : num 21 51 54 2 0 1 56 52 2 171 ...
## $ 3/17/20 : num 22 55 60 39 0 1 68 78 2 210 ...
## $ 3/18/20 : num 22 59 74 39 0 1 79 84 3 267 ...
## $ 3/19/20 : num 22 64 87 53 0 1 97 115 4 307 ...
## $ 3/20/20 : num 24 70 90 75 1 1 128 136 6 353 ...
## $ 3/21/20 : num 24 76 139 88 2 1 158 160 9 436 ...
## $ 3/22/20 : num 40 89 201 113 2 1 266 194 19 669 ...
## $ 3/23/20 : num 40 104 230 133 3 3 301 235 32 669 ...
## $ 3/24/20 : num 74 123 264 164 3 3 387 249 39 818 ...
## $ 3/25/20 : num 84 146 302 188 3 ...
## - attr(*, "spec")=
## .. cols(
## .. `Province/State` = col_character(),
## .. `Country/Region` = col_character(),
## .. Lat = col_double(),
## .. Long = col_double(),
## .. `1/22/20` = col_double(),
## .. `1/23/20` = col_double(),
## .. `1/24/20` = col_double(),
## .. `1/25/20` = col_double(),
## .. `1/26/20` = col_double(),
## .. `1/27/20` = col_double(),
## .. `1/28/20` = col_double(),
## .. `1/29/20` = col_double(),
## .. `1/30/20` = col_double(),
## .. `1/31/20` = col_double(),
## .. `2/1/20` = col_double(),
## .. `2/2/20` = col_double(),
## .. `2/3/20` = col_double(),
## .. `2/4/20` = col_double(),
## .. `2/5/20` = col_double(),
## .. `2/6/20` = col_double(),
## .. `2/7/20` = col_double(),
## .. `2/8/20` = col_double(),
## .. `2/9/20` = col_double(),
## .. `2/10/20` = col_double(),
## .. `2/11/20` = col_double(),
## .. `2/12/20` = col_double(),
## .. `2/13/20` = col_double(),
## .. `2/14/20` = col_double(),
## .. `2/15/20` = col_double(),
## .. `2/16/20` = col_double(),
## .. `2/17/20` = col_double(),
## .. `2/18/20` = col_double(),
## .. `2/19/20` = col_double(),
## .. `2/20/20` = col_double(),
## .. `2/21/20` = col_double(),
## .. `2/22/20` = col_double(),
## .. `2/23/20` = col_double(),
## .. `2/24/20` = col_double(),
## .. `2/25/20` = col_double(),
## .. `2/26/20` = col_double(),
## .. `2/27/20` = col_double(),
## .. `2/28/20` = col_double(),
## .. `2/29/20` = col_double(),
## .. `3/1/20` = col_double(),
## .. `3/2/20` = col_double(),
## .. `3/3/20` = col_double(),
## .. `3/4/20` = col_double(),
## .. `3/5/20` = col_double(),
## .. `3/6/20` = col_double(),
## .. `3/7/20` = col_double(),
## .. `3/8/20` = col_double(),
## .. `3/9/20` = col_double(),
## .. `3/10/20` = col_double(),
## .. `3/11/20` = col_double(),
## .. `3/12/20` = col_double(),
## .. `3/13/20` = col_double(),
## .. `3/14/20` = col_double(),
## .. `3/15/20` = col_double(),
## .. `3/16/20` = col_double(),
## .. `3/17/20` = col_double(),
## .. `3/18/20` = col_double(),
## .. `3/19/20` = col_double(),
## .. `3/20/20` = col_double(),
## .. `3/21/20` = col_double(),
## .. `3/22/20` = col_double(),
## .. `3/23/20` = col_double(),
## .. `3/24/20` = col_double(),
## .. `3/25/20` = col_double()
## .. )
time_series_deaths <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
rename(Province.State = "Province/State", Country.Region = "Country/Region")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_deaths)
## # A tibble: 6 x 68
## Province.State Country.Region Lat Long `1/22/20` `1/23/20` `1/24/20`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 <NA> Afghanistan 33 65 0 0 0
## 2 <NA> Albania 41.2 20.2 0 0 0
## 3 <NA> Algeria 28.0 1.66 0 0 0
## 4 <NA> Andorra 42.5 1.52 0 0 0
## 5 <NA> Angola -11.2 17.9 0 0 0
## 6 <NA> Antigua and B… 17.1 -61.8 0 0 0
## # … with 61 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## # `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## # `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## # `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## # `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## # `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## # `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## # `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## # `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## # `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## # `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## # `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## # `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## # `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## # `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## # `3/24/20` <dbl>, `3/25/20` <dbl>
str(time_series_deaths)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 245 obs. of 68 variables:
## $ Province.State: chr NA NA NA NA ...
## $ Country.Region: chr "Afghanistan" "Albania" "Algeria" "Andorra" ...
## $ Lat : num 33 41.2 28 42.5 -11.2 ...
## $ Long : num 65 20.17 1.66 1.52 17.87 ...
## $ 1/22/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/23/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/24/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/25/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/26/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/27/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/28/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/29/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/30/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 1/31/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/1/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/2/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/3/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/4/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/5/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/6/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/7/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/8/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/9/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/10/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/11/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/12/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/13/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/14/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/15/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/16/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/17/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/18/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/19/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/20/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/21/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/22/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/23/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/24/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/25/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/26/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/27/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/28/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 2/29/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 3/1/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 3/2/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 3/3/20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ 3/4/20 : num 0 0 0 0 0 0 0 0 0 1 ...
## $ 3/5/20 : num 0 0 0 0 0 0 0 0 0 1 ...
## $ 3/6/20 : num 0 0 0 0 0 0 0 0 0 1 ...
## $ 3/7/20 : num 0 0 0 0 0 0 0 0 0 1 ...
## $ 3/8/20 : num 0 0 0 0 0 0 1 0 0 2 ...
## $ 3/9/20 : num 0 0 0 0 0 0 1 0 0 2 ...
## $ 3/10/20 : num 0 0 0 0 0 0 1 0 0 2 ...
## $ 3/11/20 : num 0 1 0 0 0 0 1 0 0 2 ...
## $ 3/12/20 : num 0 1 1 0 0 0 1 0 0 2 ...
## $ 3/13/20 : num 0 1 2 0 0 0 2 0 0 2 ...
## $ 3/14/20 : num 0 1 3 0 0 0 2 0 0 2 ...
## $ 3/15/20 : num 0 1 4 0 0 0 2 0 0 2 ...
## $ 3/16/20 : num 0 1 4 0 0 0 2 0 0 2 ...
## $ 3/17/20 : num 0 1 4 0 0 0 2 0 0 4 ...
## $ 3/18/20 : num 0 2 7 0 0 0 2 0 0 5 ...
## $ 3/19/20 : num 0 2 9 0 0 0 3 0 0 5 ...
## $ 3/20/20 : num 0 2 11 0 0 0 3 0 0 6 ...
## $ 3/21/20 : num 0 2 15 0 0 0 4 0 0 6 ...
## $ 3/22/20 : num 1 2 17 1 0 0 4 0 0 6 ...
## $ 3/23/20 : num 1 4 17 1 0 0 4 0 0 6 ...
## $ 3/24/20 : num 1 5 19 1 0 0 6 0 0 7 ...
## $ 3/25/20 : num 2 5 21 1 0 0 8 0 0 7 ...
## - attr(*, "spec")=
## .. cols(
## .. `Province/State` = col_character(),
## .. `Country/Region` = col_character(),
## .. Lat = col_double(),
## .. Long = col_double(),
## .. `1/22/20` = col_double(),
## .. `1/23/20` = col_double(),
## .. `1/24/20` = col_double(),
## .. `1/25/20` = col_double(),
## .. `1/26/20` = col_double(),
## .. `1/27/20` = col_double(),
## .. `1/28/20` = col_double(),
## .. `1/29/20` = col_double(),
## .. `1/30/20` = col_double(),
## .. `1/31/20` = col_double(),
## .. `2/1/20` = col_double(),
## .. `2/2/20` = col_double(),
## .. `2/3/20` = col_double(),
## .. `2/4/20` = col_double(),
## .. `2/5/20` = col_double(),
## .. `2/6/20` = col_double(),
## .. `2/7/20` = col_double(),
## .. `2/8/20` = col_double(),
## .. `2/9/20` = col_double(),
## .. `2/10/20` = col_double(),
## .. `2/11/20` = col_double(),
## .. `2/12/20` = col_double(),
## .. `2/13/20` = col_double(),
## .. `2/14/20` = col_double(),
## .. `2/15/20` = col_double(),
## .. `2/16/20` = col_double(),
## .. `2/17/20` = col_double(),
## .. `2/18/20` = col_double(),
## .. `2/19/20` = col_double(),
## .. `2/20/20` = col_double(),
## .. `2/21/20` = col_double(),
## .. `2/22/20` = col_double(),
## .. `2/23/20` = col_double(),
## .. `2/24/20` = col_double(),
## .. `2/25/20` = col_double(),
## .. `2/26/20` = col_double(),
## .. `2/27/20` = col_double(),
## .. `2/28/20` = col_double(),
## .. `2/29/20` = col_double(),
## .. `3/1/20` = col_double(),
## .. `3/2/20` = col_double(),
## .. `3/3/20` = col_double(),
## .. `3/4/20` = col_double(),
## .. `3/5/20` = col_double(),
## .. `3/6/20` = col_double(),
## .. `3/7/20` = col_double(),
## .. `3/8/20` = col_double(),
## .. `3/9/20` = col_double(),
## .. `3/10/20` = col_double(),
## .. `3/11/20` = col_double(),
## .. `3/12/20` = col_double(),
## .. `3/13/20` = col_double(),
## .. `3/14/20` = col_double(),
## .. `3/15/20` = col_double(),
## .. `3/16/20` = col_double(),
## .. `3/17/20` = col_double(),
## .. `3/18/20` = col_double(),
## .. `3/19/20` = col_double(),
## .. `3/20/20` = col_double(),
## .. `3/21/20` = col_double(),
## .. `3/22/20` = col_double(),
## .. `3/23/20` = col_double(),
## .. `3/24/20` = col_double(),
## .. `3/25/20` = col_double()
## .. )
time_series_confirmed_long <- time_series_confirmed %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "Confirmed") %>%
group_by(Country.Region,Date) %>%
summarise(Confirmed = sum(Confirmed))
# convert date to data format
time_series_confirmed_long$Date <- mdy(time_series_confirmed_long$Date)
head(time_series_confirmed_long)
## # A tibble: 6 x 3
## # Groups: Country.Region [1]
## Country.Region Date Confirmed
## <chr> <date> <dbl>
## 1 Afghanistan 2020-01-22 0
## 2 Afghanistan 2020-01-23 0
## 3 Afghanistan 2020-01-24 0
## 4 Afghanistan 2020-01-25 0
## 5 Afghanistan 2020-01-26 0
## 6 Afghanistan 2020-01-27 0
time_series_confirmed_long %>%
filter (Country.Region == "US") %>%
ggplot(aes(x = Date, y = Confirmed)) +
geom_point() +
geom_line() +
ggtitle("US Confirmed COVID-19 Cases")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/US_all_march_11.png", width = 10, height = 10, dpi = "screen")
time_series_confirmed_long %>%
filter (Country.Region == "Colombia") %>%
ggplot(aes(x = Date, y = Confirmed)) +
geom_point() +
geom_line() +
ggtitle("Colombia Confirmed COVID-19 Cases")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_colombia_march_11.png", width = 10, height = 10, dpi = "screen")
time_series_confirmed_long %>%
filter (Country.Region == "China") %>%
ggplot(aes(x = Date, y = Confirmed)) +
geom_point() +
geom_line() +
ggtitle("China Confirmed COVID-19 Cases")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_china_march_11.png", width = 10, height = 10, dpi = "screen")
time_series_confirmed_long %>%
filter(Country.Region %in% c("China", "Japan", "Korea, South","Italy","Spain","US" )) %>%
ggplot(aes(x=Date, y=Confirmed))+
geom_point()+
geom_line()+
ggtitle("Confirmed COVID-19 Cases") +
facet_wrap(~Country.Region, ncol = 2, scales = "free_y")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_comparison_march_11.png", width = 10, height = 10, dpi = "screen")
time_series_confirmed_long %>%
filter(Country.Region %in% c("Colombia", "Chile", "Argentina","Brazil","Ecuador","Peru","Mexico","Venezuela" )) %>%
ggplot(aes(x=Date, y=Confirmed))+
geom_point()+
geom_line()+
ggtitle("Confirmed COVID-19 Cases") +
facet_wrap(~Country.Region, ncol = 2, scales = "free_y")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_LatinAmerica_comparison_march_11.png", width = 10, height = 10, dpi = "screen")
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
time_series_confirmed_long %>%
filter (Country.Region %in% c("China","France","Italy",
"Korea, South", "US")) %>%
ggplot(aes(x = Date, y = Confirmed, color = Country.Region)) +
geom_point() +
scale_color_manual(values = cbPalette)+
geom_line() +
ggtitle("Confirmed COVID-19 Cases")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_all_march_11.png", width = 10, height = 10, dpi = "screen")
time_series_confirmed_long %>%
filter (Country.Region %in% c("Colombia", "Chile", "Argentina","Brazil","Ecuador","Peru","Mexico","Venezuela")) %>%
ggplot(aes(x = Date, y = Confirmed, color = Country.Region)) +
geom_point() +
geom_line() +
scale_color_manual(values = cbPalette)+
ggtitle("Confirmed COVID-19 Cases")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_all2_march_11.png", width = 10, height = 10, dpi = "screen")
report_03_24_2020 %>%
filter (Country.Region == "US") %>%
group_by(Province.State) %>%
summarise(Confirmed = sum(Confirmed)) %>%
ggplot(aes(x = Confirmed, y = reorder(Province.State, Confirmed))) +
geom_point() +
ggtitle("Confirmed cases for each US State") +
ylab ("Country/Region") +
xlab("Confirmed Cases March 24")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/Cases_US_march_24.png", width = 10, height = 10, dpi = "screen")
report_03_24_2020 %>%
group_by(Country.Region) %>%
summarise(Deaths = sum(Deaths)) %>%
arrange(desc(Deaths)) %>%
slice(1:20) %>%
ggplot(aes(y = Deaths, x = reorder(Country.Region, Deaths))) +
geom_bar(stat = 'identity') +
ggtitle("The 20 Countries with the most reported COV19-related deaths March 24")+
ylab("Country/Region") +
xlab("Deaths") +
coord_flip()
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/deaths_world_march_24.png", width = 10, height = 10, dpi = "screen")
report_03_24_2020 %>%
group_by(Country.Region) %>%
summarise(Deaths = sum(Deaths)) %>%
arrange(desc(Deaths)) %>%
slice(1:20) %>%
ggplot(aes(y = Deaths, x = reorder(Country.Region, Deaths))) +
geom_bar(stat = 'identity') +
ggtitle("The 20 Countries with the most reported COV19-related deaths")+
ylab("Country/Region") +
xlab("Deaths") +
coord_flip()
library(maps)
library(viridis)
world <- map_data("world")
mybreaks <- c(1, 20, 100, 1000, 50000)
ggplot() +
geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
geom_point(data=time_series_confirmed, aes(x=Long, y=Lat, size=`2/25/20`, color=`2/25/20`),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
# scale_alpha_continuous(name="Cases", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
scale_color_viridis_c(option="inferno",name="Cases", trans="log",breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
theme_void() +
guides( colour = guide_legend()) +
labs(caption = "") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
)+
ggtitle("Confirmed COVID-19 Cases Feb 25/20")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 169 rows containing missing values (geom_point).
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/onfirmed_world_feb_25.png", dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 169 rows containing missing values (geom_point).
ggplot() +
geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
geom_point(data=time_series_deaths, aes(x=Long, y=Lat, size=`2/25/20`, color=`2/25/20`),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
# scale_alpha_continuous(name="Cases", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
scale_color_viridis_c(option="inferno",name="Cases", trans="log",breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
theme_void() +
guides( colour = guide_legend()) +
labs(caption = "") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
)+
ggtitle("Confirmed COVID-19 deaths Feb 25/20")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 211 rows containing missing values (geom_point).
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/deaths_world_feb_25.png", dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 211 rows containing missing values (geom_point).
ggplot() +
geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
geom_point(data=time_series_confirmed, aes(x=Long, y=Lat, size=`3/24/20`, color=`3/24/20`),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
# scale_alpha_continuous(name="Cases", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
scale_color_viridis_c(option="inferno",name="Cases", trans="log",breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
theme_void() +
guides( colour = guide_legend()) +
labs(caption = "") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
)+
ggtitle("Confirmed COVID-19 Cases March 24/20")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 5 rows containing missing values (geom_point).
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/confirmed_world_march_24.png", dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 5 rows containing missing values (geom_point).
ggplot() +
geom_polygon(data = world, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
geom_point(data=time_series_deaths, aes(x=Long, y=Lat, size=`3/24/20`, color=`3/24/20`),stroke=F, alpha=0.7) +
scale_size_continuous(name="Deaths", trans="log", range=c(1,7),breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
# scale_alpha_continuous(name="Deaths", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
scale_color_viridis_c(option="inferno",name="Deaths", trans="log",breaks=mybreaks, labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) +
theme_void() +
guides( colour = guide_legend()) +
labs(caption = "") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
)+
ggtitle("Confirmed COVID-19 deaths March 24/20")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 118 rows containing missing values (geom_point).
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/deaths_world_march_24.png", dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 118 rows containing missing values (geom_point).
time_series_confirmed_long2 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province.State = "Province/State", Country.Region = "Country/Region") %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(Date = mdy(Date) - days(1),
Place = paste(Lat,Long,sep="_")) %>%
group_by(Place,Date) %>%
summarise(cumulative_cases = ifelse(sum(cumulative_cases)>0,
sum(cumulative_cases),NA_real_),
Lat = mean(Lat),
Long = mean(Long)) %>%
mutate(Pandemic_day = as.numeric(Date - min(Date)))
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed_long2)
## # A tibble: 6 x 6
## # Groups: Place [1]
## Place Date cumulative_cases Lat Long Pandemic_day
## <chr> <date> <dbl> <dbl> <dbl> <dbl>
## 1 -0.0236_37.9062 2020-01-21 NA -0.0236 37.9 0
## 2 -0.0236_37.9062 2020-01-22 NA -0.0236 37.9 1
## 3 -0.0236_37.9062 2020-01-23 NA -0.0236 37.9 2
## 4 -0.0236_37.9062 2020-01-24 NA -0.0236 37.9 3
## 5 -0.0236_37.9062 2020-01-25 NA -0.0236 37.9 4
## 6 -0.0236_37.9062 2020-01-26 NA -0.0236 37.9 5
ggplot(subset(time_series_confirmed_long2, Date %in% seq(min(Date),max(Date),7)),
aes(x = Long, y = Lat, size = cumulative_cases/1000)) +
borders("world", colour = NA, fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 spread',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1)+
facet_wrap(.~Date,nrow=3)
## Warning: Removed 1316 rows containing missing values (geom_point).
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/static.png", dpi = "screen")
## Saving 7 x 5 in image
## Warning: Removed 1316 rows containing missing values (geom_point).
#Latin countries
some.latin <- c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay")
#retrieve map data
some.latin <- map_data("world", region = some.latin)
#Coordinates of countries
region.coord.data <- some.latin %>%
group_by(region) %>%
summarise(long = mean(long), lat = mean(lat))
#Confirmed cases of COVID-19 in Latin America
time_series_confirmed_latin <- time_series_confirmed %>%
filter (Country.Region %in% c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay"))
#Confirmed Deaths of COVID-19 in Latin America
time_series_deaths_latin <- time_series_deaths %>%
filter(Country.Region %in% c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay"))
mybreaks2 <- c(1, 50, 100, 200, 300, 1000, 2000)
ggplot() +
geom_polygon(data = some.latin, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3,colour='white') +
geom_point(data=time_series_confirmed_latin, aes(x=Long, y=Lat, size=`3/24/20`, color=`3/24/20`),stroke=F, alpha=0.7)+
scale_size_continuous(name="Cases", trans="log", range=c(1,20),breaks=mybreaks2, labels = c("1-49", "50-99", "100-199", "200-299", "300-399", "1000-1999",'2000+')) +
# scale_alpha_continuous(name="Cases", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
scale_color_viridis_c(option="inferno",name="Cases", trans="log",breaks=mybreaks2, labels = c("1-49","50-99", "100-199", "200-299", "300-399", "1000-1999",'2000+')) +
theme_void() +
guides( colour = guide_legend()) +
labs(caption = "") +
theme(
legend.position = "bottom",
legend.text = element_text(size = 30),
legend.title = element_text(size = 30),
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA))+
geom_text(aes(x=long, y=lat,label=region), data = region.coord.data, size =5, hjust=0.5)+
ggtitle("COVID-19 Confirmed Cases in South America reported on March 24/20")
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/COVID-19-Confirmed-Cases-South-America-March-24.png", width = 14, height = 12, dpi = "screen")
mybreaks3 <- c(5,15, 25, 35, 45)
ggplot() +
geom_polygon(data = some.latin, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3,colour='white') +
geom_point(data=time_series_deaths_latin, aes(x=Long, y=Lat, size=`3/24/20`, color=`3/24/20`),stroke=F, alpha=0.7)+
scale_size_continuous(name="Deaths", trans="log", range=c(1,20),breaks=mybreaks3, labels = c('0-9', '10-19', '20-39', '40-49','50+')) +
# scale_alpha_continuous(name="Deaths", trans="log", range=c(0.1, 0.9),breaks=mybreaks) +
scale_color_viridis_c(option="inferno",name="Deaths", trans="log",breaks=mybreaks3, labels = c('0-9', '10-19', '20-39', '40-49', '50+')) +
theme_void() +
guides( colour = guide_legend()) +
labs(caption = "") +
theme(
legend.position = "bottom",
legend.text = element_text(size = 30),
legend.title = element_text(size = 30),
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA))+
geom_text(aes(x=long, y=lat,label=region), data = region.coord.data, size =5, hjust=0.5)+
ggtitle("COVID-19 Confirmed Deaths in South America reported on March 24/20")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 3 rows containing missing values (geom_point).
ggsave("/Users/katherinechaconvargas/Dropbox/2020_UMASS/Human_Genome_Class_Jeffrey/images/COVID-19-Confirmed-deaths-South-America-March-24.png", width = 14, height = 12, dpi = "screen")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 3 rows containing missing values (geom_point).
#Latin cases
time_series_confirmed_long2_latin <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province.State = "Province/State", Country.Region = "Country/Region") %>%
filter (Country.Region %in% c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay")) %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(Date = mdy(Date) - days(1),
Place = paste(Lat,Long,sep="_")) %>%
group_by(Place,Date) %>%
summarise(cumulative_cases = ifelse(sum(cumulative_cases)>0,
sum(cumulative_cases),NA_real_),
Lat = mean(Lat),
Long = mean(Long)) %>%
mutate(Pandemic_day = as.numeric(Date - min(Date))) %>%
filter(!is.na(cumulative_cases))
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
ggplot(subset(time_series_confirmed_long2_latin, Date %in% seq(min(Date),max(Date),4)),
aes(x = Long, y = Lat, size = cumulative_cases/100))+
borders('world',colour = NA, fill = "grey90", regions = c("Colombia","Brazil","Peru","Ecuador","Chile","Venezuela","Bolivia","Argentina","Paraguay","Uruguay"))+
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 spread',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1)+
facet_wrap(.~Date,nrow=3)+
geom_text(aes(x=long, y=lat,label=region), data = region.coord.data, size =2, hjust=0.5)
map_colombia <- world %>%
filter(region == 'Colombia')
#Colombia cases
time_series_confirmed_long2_Colombia <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province.State = "Province/State", Country.Region = "Country/Region") %>%
filter(Country.Region == 'Colombia') %>%
pivot_longer(-c(Province.State, Country.Region, Lat, Long),
names_to = "Date", values_to = "cumulative_cases") %>%
mutate(Date = mdy(Date) - days(1),
Place = paste(Lat,Long,sep="_")) %>%
group_by(Place,Date) %>%
summarise(cumulative_cases = ifelse(sum(cumulative_cases)>0,
sum(cumulative_cases),NA_real_),
Lat = mean(Lat),
Long = mean(Long)) %>%
mutate(Pandemic_day = as.numeric(Date - min(Date))) %>%
filter(!is.na(cumulative_cases))
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
ggplot(subset(time_series_confirmed_long2_Colombia, Date %in% seq(min(Date),max(Date),3)),
aes(x = Long, y = Lat, size = cumulative_cases/100))+
borders('world',colour = NA, fill = "grey90", regions = 'Colombia')+
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 spread',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1)+
facet_wrap(.~Date,nrow=3)
```